//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Tables and figures
// Table 2, 3, 4, 5, A1, A2, A3, A4 A5, A6, A7
// Figure 2, A5, A6
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
set more off	

// Directory setting (Need to be modified for each user)
	gl data_path="/Users/thomaskim/Documents/malawi/selectionpaper/data/old_dta"
	gl result_path="/Users/thomaskim/Documents/malawi/selectionpaper/data/result"
	gl graph_path="/Users/thomaskim/Documents/malawi/selectionpaper/data/figures"

// Used data: enumerator_level.dta, household_level.dta, secondary_sch_survey.dta
	
// 2014 Baseline survey individual characteristics
gl char="age num_siblings asset_score2 current_workstatus2 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe ability_index31 ability_index32 mcoffer2 eduoffer2 cct fortoday"


//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table 2: Randomization Balance Check
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cap log using "$result_path/tab2_balance.txt", text replace
use "$data_path/enumerator_level.dta", clear

// Panel A: 2014 baseline survey
//// First stage randomization balance check (columns (2)-(4))
preserve
keep if first_stage==1 | first_stage==2
foreach variable of varlist $char {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(first_stage) unequal
}
restore

//// Second stage randomization balance check (columns (5), (6))
//// G1 vs G2
preserve
keep if second_stage==1 | second_stage==2
foreach variable of varlist $char {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(second_stage) unequal
}
restore
//// G3 vs G4
preserve
keep if second_stage==3 | second_stage==4
foreach variable of varlist $char {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(second_stage) unequal
}
restore

// Panel B: Characteristics of dispatched catchment areas
use "$data_path/household_level.dta", clear
collapse (mean) hh_hsa_enu distance1 num_hhmember_new asset_score_hsa_new birth_hsa_new death_hsa_new malaria_hsa_new second_stage first_stage, by(id)

//// First stage randomization balance check (columns (2)-(4))
preserve
drop if first_stage==. | first_stage==3
display "First stage balance check "
local hsa hh_hsa_enu distance1 num_hhmember_new asset_score_hsa_new birth_hsa_new death_hsa_new malaria_hsa_new
foreach variable of varlist `hsa' {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(first_stage) unequal
}
restore

//// Second stage randomization balance check (columns (5), (6))
//// G1 vs G2
preserve
keep if second_stage==1 | second_stage==2
display "Second stage balance check career group"
foreach variable of varlist `hsa' {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(second_stage) unequal
}
restore
//// G3 vs G4
preserve
keep if second_stage==3 | second_stage==4
display "Second stage balance check financial group"
foreach variable of varlist `hsa' {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(second_stage) unequal
}
restore
log close



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table 3. Job Offer Acceptance by Individual Trait
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variable is train_accept4, a binary variable with 0 if a subject rejects our job offer and 1 if accepts our job offer.
use "$data_path/enumerator_level.dta", clear

local demo age num_siblings asset_score2 current_workstatus2
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local cog ability_index31 ability_index32
local hiv cct mcoffer2 eduoffer2 
local exp avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe
preserve
drop if first_stage==3
char first_stage[omit] 2
xi i.first_stage
reg train_accept4 _Ifirst_sta_1 `hiv', robust
outreg2 using "$result_path/tab3_joboffer.xls", symbol(***, **, *) dec(3) excel replace drop(`indi' `hiv' o.*)
foreach v of varlist `demo' `noncog' `exp' `cog' {
g `v'_inter=`v'*_Ifirst_sta_1
reg train_accept4 _Ifirst_sta_1 `v' `v'_inter `hiv', robust
outreg2 using "$result_path/tab3_joboffer.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `hiv' o.*)
}
sum `demo' `noncog' `exp' `cog'
restore



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table 4. Training Performance
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variables are quiz (quiz score) and mock_error (Practice survey error rate).
use "$data_path/enumerator_level.dta", clear

// Practice survey type (A & B)
g mocktype_a=.
replace mocktype_a=0 if train_accept4==1
replace mocktype_a=1 if train_accept4==1 & surveytype=="A"

// Mock test pair fixed effect
forval i=1/44 {
g pairfixci`i'=(surveypair==`i')
replace pairfixci`i'=0 if gp=="CM"
}
forval i=1/54 {
g pairfixcm`i'=(surveypair==`i')
replace pairfixcm`i'=0 if gp=="CI"
}

local demo i.age asset_score2
local cog ability_index31 ability_index32 
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local hiv cct mcoffer2 eduoffer2
local indiv `demo' `cog' `noncog'
local pairfix pairfixci1 pairfixci2 pairfixci3 pairfixci4 pairfixci5 pairfixci6 pairfixci7 pairfixci8 pairfixci9 pairfixci10 pairfixci11 pairfixci12 pairfixci13 pairfixci14 pairfixci15 pairfixci16 pairfixci17 pairfixci18 pairfixci19 pairfixci20 pairfixci21 pairfixci22 pairfixci23 pairfixci24 pairfixci25 pairfixci26 pairfixci27 pairfixci28 pairfixci29 pairfixci30 pairfixci31 pairfixci32 pairfixci33 pairfixci34 pairfixci35 pairfixci36 pairfixci37 pairfixci38 pairfixci39 pairfixci40 pairfixci41 pairfixci42 pairfixci43 pairfixci44 pairfixcm1 pairfixcm2 pairfixcm3 pairfixcm4 pairfixcm5 pairfixcm6 pairfixcm7 pairfixcm8 pairfixcm9 pairfixcm10 pairfixcm11 pairfixcm12 pairfixcm13 pairfixcm14 pairfixcm15 pairfixcm16 pairfixcm17 pairfixcm18 pairfixcm19 pairfixcm20 pairfixcm21 pairfixcm22 pairfixcm23 pairfixcm24 pairfixcm25 pairfixcm26 pairfixcm27 pairfixcm28 pairfixcm29 pairfixcm30 pairfixcm31 pairfixcm32 pairfixcm33 pairfixcm34 pairfixcm35 pairfixcm36 pairfixcm37 pairfixcm38 pairfixcm39 pairfixcm40 pairfixcm41 pairfixcm42 pairfixcm43 pairfixcm44 pairfixcm45 pairfixcm46 pairfixcm47 pairfixcm48 pairfixcm49 pairfixcm50 pairfixcm51 pairfixcm52 pairfixcm53 pairfixcm54

// Panel A: 148 Trainee Sample
preserve
drop if first_stage==3
drop if train_accept4==0
char first_stage [omit] 2
xi i.first_stage
reg quiz _Ifirst_sta_1 `hiv' num_siblings, robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel replace drop(`indi' o.* `hiv' `demo' `cog' `noncog' num_siblings) 
reg quiz _Ifirst_sta_1 `hiv' num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a num_siblings, robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a `pairfix' num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
foreach v of varlist practice_error quiz {
sum `v' if first_stage==2
}
restore

// Panel B: 137 Enumerator Sample
preserve
drop if second_stage==. | second_stage==5
char first_stage [omit] 2
xi i.first_stage
reg quiz _Ifirst_sta_1 `hiv' num_siblings, robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' o.* `hiv' `demo' `cog' `noncog' num_siblings) 
reg quiz _Ifirst_sta_1 `hiv' num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a num_siblings, robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
reg practice_error _Ifirst_sta_1 `hiv' mocktype_a `pairfix' num_siblings `indiv', robust
outreg2 using "$result_path/tab4_trainoutcome.xls", symbol(***, **, *) auto(3) excel append drop(`indi' `hiv' o.* `demo' `cog' `noncog' `pairfix' mocktype_a num_siblings)
foreach v of varlist practice_error quiz {
sum `v' if first_stage==2
}
restore

	

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table 5 Selection and Causal Effects of Work Incentives on Job Performance: Main Outcomes
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variables are error_rate (survey quality) and num_dailysurvey (survey quantity).
use "$data_path/household_level.dta", clear

// 1. Survey quality (error rate)
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
char second_stage[omit] 4
xi i.second_stage
rename _Isecond_st_1 G1
rename _Isecond_st_2 G2
rename _Isecond_st_3 G3
//// Panel A: Selection effect (G2 vs G3)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel replace drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
foreach v of varlist error_rate {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}

// 2. Survey quantity (num_dailysurvey)
//// Aggregate into enumerator-day level
collapse (mean) quiz distance1 practice_error G1 G2 G3 avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe attitude num_dailysurvey second_stage hsa day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44 age asset_score2 num_siblings  ability_index31 ability_index32 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5 num_hhmember_new malaria_hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new death_hsa_new error_rate after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5 cct mcoffer2 eduoffer2, by(datenum id) 
replace datenum=2 if datenum==1
replace datenum=datenum-1
g id_workday=id*10000+datenum
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist num_dailysurvey {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tab5_jobperformance.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
}
foreach v of varlist num_dailysurvey {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////	
// Table A.1: Randomization balance check between treatment and non-selected groups
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cap log using "$result_path/tabA1_balance1.txt", text replace

// Panel A: 2011 secondary school survey
use "$data_path/secondary_sch_survey.dta", clear
g grp00=0
replace grp00=1 if grp=="CI1" | grp=="CI2" | grp=="CM1" | grp=="CM2"	// Those who are invited to the 2014 baseline survey
preserve
foreach v of varlist height_b weight_b q103age_b living_with_father living_with_mother asset_score self_health_good raven11_score {
display "Summary stats of `v' "
sum `v'
display ""
display "t test-based balance check for `v' btw Career & Wage"
ttest `v', by(grp00) unequal
display ""
}
restore

// Panel B: 2014 baseline survey
use "$data_path/enumerator_level.dta", clear
preserve
g itt_treat=0
replace itt_treat=1 if first_stage==3
foreach variable of varlist $char {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw ITT treat vs control"
ttest `variable', by(itt_treat) unequal
}
restore
log close



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.2: Individual characteristics between baseline survey participants and non-participants
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cap log using "$result_path/tabA2_balance2.txt", text replace

// 2011 secondary school survey
use "$data_path/secondary_sch_survey.dta", clear
g grp00=0
replace grp00=1 if grp=="CI1" | grp=="CI2" | grp=="CM1" | grp=="CM2"	// Those who are invited to the 2014 baseline survey

preserve
drop if grp00==0
g total0=1
replace total0=0 if confirm==0 | confirm==2
** confirm==1 if subjects participated in the 2014 baseline survey
** confirm==0 if subjects were not reachable and confirm==2 if refused to participate in the 2014 baseline survey 

foreach v of varlist height_b weight_b q103age_b living_with_father living_with_mother asset_score self_health_good raven11_score {
display "Summary stats of `v' "
sum `v'
display ""
display "t test-based balance check for `v' btw Career & Wage"
ttest `v', by(total0) unequal
display ""
}
restore
log close



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.3: Characteristics difference between enumerators by the supervision teams
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cap log using "$result_path/tabA3_balance3.txt", text replace

// 2014 baseline survey
use "$data_path/enumerator_level.dta", clear
rename team_visit1 firstvisit

foreach v of varlist $char {
sum `v' if firstvisit==1
}

// Compare supervision group 1 to the other supervision group `i'
forval i=2/5 {
preserve
keep if firstvisit==1 | firstvisit==`i'
foreach variable of varlist $char {
display ""
display "t test-based balance check for `variable' btw Career & Wage"
ttest `variable', by(firstvisit) unequal
}
restore
}
log close



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.4: Individual characteristics after job offer acceptance
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cap log using "$result_path/tabA4_balance4.txt", text replace
use "$data_path/enumerator_level.dta", clear

// 2014 baseline survey
preserve
drop if first_stage==3			// drop control group
drop if train_accept4==0		// drop those who refuse the internship offer or wage offer
foreach variable of varlist age bmi num_siblings asset_score2 current_workstatus2 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe ability_index31 ability_index32 mcoffer2 eduoffer2 cct fortoday {
display "Summary stats of the variable `variable' "
sum `variable'
display ""
display "t test-based balance check for `variable' btw career vs wage acceptance"
ttest `variable', by(first_stage) unequal
}
// F-test
char first_stage[omit] 2
xi i.first_stage
reg _Ifirst_sta_1 age bmi num_siblings asset_score2 current_workstatus2 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences avgfrac_distant_tpe ccei_combined_tpe avgfrac_risk ability_index31 ability_index32 mcoffer2 eduoffer2 cct fortoday
restore
log close



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.5. Selection and incentive effects of work incentives on job performance: subjective performance evaluation
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variables are SPE by survey respondents (pes3q) and SPE by supervisors (attitude).
use "$data_path/household_level.dta", clear

char second_stage[omit] 4
xi i.second_stage
rename _Isecond_st_1 G1
rename _Isecond_st_2 G2
rename _Isecond_st_3 G3

// 1. Subjective performance evaluation (by survey respondents)
// PES enumerator fixed effect
replace enumerator_pes="ISAIAH" if enumerator_pes=="ISAIAH  "
replace enumerator_pes="HENRY" if enumerator_pes=="HENRY "
encode enumerator_pes, generate(enu_pes)
char enu_pes [omit] 32
xi i.enu_pes

// drop when the PES respondents are not the census respondents
preserve
keep if pes2q=="1"

// Destring the outcome variable, pes3q
foreach v of varlist pes3q {
replace `v'="." if `v'=="0" | `v'=="6" | `v'=="2015-06-03" | `v'=="2015-06-02" | `v'=="2015-06-22" | `v'=="55" | `v'=="9" | `v'=="francis maliwa"
destring `v', replace
}

local fixed_pes _Ienu_pes_1 _Ienu_pes_2 _Ienu_pes_3 _Ienu_pes_4 _Ienu_pes_5 _Ienu_pes_6 _Ienu_pes_7 _Ienu_pes_8 _Ienu_pes_9 _Ienu_pes_10 _Ienu_pes_11 _Ienu_pes_12 _Ienu_pes_13 _Ienu_pes_14 _Ienu_pes_15 _Ienu_pes_16 _Ienu_pes_17 _Ienu_pes_18 _Ienu_pes_19 _Ienu_pes_20 _Ienu_pes_21 _Ienu_pes_22 _Ienu_pes_23 _Ienu_pes_24 _Ienu_pes_25 _Ienu_pes_26 _Ienu_pes_27 _Ienu_pes_28 _Ienu_pes_29 _Ienu_pes_30 _Ienu_pes_31
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2  distance1
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist pes3q  {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel replace drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
}
sum pes3q if second_stage==1
sum pes3q if second_stage==2
sum pes3q if second_stage==3
sum pes3q if second_stage==4
restore

// 2. Subjective performance evaluation (by supervisors)
//// Aggregate into enumerator level
collapse (mean) quiz distance1 practice_error G1 G2 G3 avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe attitude num_dailysurvey second_stage hsa day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44 age asset_score2 num_siblings  ability_index31 ability_index32 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5 num_hhmember_new malaria_hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new death_hsa_new error_rate after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5 cct mcoffer2 eduoffer2, by(id) 
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist attitude {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==1 | second_stage==4, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA5_SPE.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' datenum o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' `train' num_siblings i.num_hhmember_new num_hhmember_new)
}
sum attitude if second_stage==1
sum attitude if second_stage==2
sum attitude if second_stage==3
sum attitude if second_stage==4



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.6. Selection and incentive effects of work incentives on job performance: additional outcomes
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variables are wrong_rate, blankall_rate, surveytime2, notsurveytime2, and dailyworktime2

// 1. Analyses for the survey level variables first ( columns (1)-(4) & (7)-(10) )
// Proportion of entries incorrectly entered : wrong_rate
// Proportion of entries incorrectly blank : blankall_rate
// Survey time per household (in mins) : surveytime2
// Intermission time between surveys (in mins) : notsurveytime2
use "$data_path/household_level.dta", clear
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
char second_stage[omit] 4
xi i.second_stage
rename _Isecond_st_1 G1
rename _Isecond_st_2 G2
rename _Isecond_st_3 G3
reg G2
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel replace 
foreach v of varlist wrong_rate blankall_rate surveytime2 notsurveytime2 {

//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
}
foreach v of varlist wrong_rate blankall_rate surveytime2 notsurveytime2 {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}

// 2. Analysis for the enumerator-day level variable ( columns (5)-(6) )
// Work hours (in mins) : dailyworktime2
//// Aggregate into enumerator-day level
collapse (mean) quiz distance1 practice_error G1 G2 G3 dailyworktime2 avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe attitude num_dailysurvey second_stage hsa day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44 age asset_score2 num_siblings  ability_index31 ability_index32 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5 num_hhmember_new malaria_hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new death_hsa_new error_rate after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5 cct mcoffer2 eduoffer2, by(datenum id) 
replace datenum=2 if datenum==1
replace datenum=datenum-1
g id_workday=id*10000+datenum
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 num_siblings 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
char second_stage[omit] 4
xi i.second_stage
foreach v of varlist dailyworktime2 {

//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)

//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA6_additional.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' i.num_hhmember_new num_hhmember_new num_siblings)
}
foreach v of varlist dailyworktime2 {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Table A.7: Selection and incentive effects of work incentives on job performance after excluding 11 enumerators from the Wage group
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Dependent variables are error_rate (survey quality) and num_dailysurvey (survey quantity), SPE by survey respondents (pes3q), and SPE by supervisors (attitude).
use "$data_path/household_level.dta", clear

// drop 11 wage group enumerators with the lowest training outcomes
drop if second_stage==. | second_stage==5
drop if total_3<=1.5420001 & first_stage==2 & id~=3121023 

// 1. Survey quality (columns (1)-(3) )
// Outcome variable: error_rate
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
char second_stage[omit] 4
xi i.second_stage
rename _Isecond_st_1 G1
rename _Isecond_st_2 G2
rename _Isecond_st_3 G3
//// Panel A: Selection effect (G2 vs G3)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel replace drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G3 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G2 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg error_rate G1 `fixed_day' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
foreach v of varlist error_rate {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}

// 2. Survey quantity (number of surveys per day) (columns (4)-(6) )
// Outcome variable: num_dailysurvey
//// Aggregate into enumerator-day level
preserve
collapse (mean) quiz distance1 practice_error G1 G2 G3 avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe attitude num_dailysurvey second_stage hsa day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44 age asset_score2 num_siblings  ability_index31 ability_index32 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5 num_hhmember_new malaria_hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new death_hsa_new error_rate after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5 cct mcoffer2 eduoffer2, by(datenum id) 
replace datenum=2 if datenum==1
replace datenum=datenum-1
g id_workday=id*10000+datenum
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist num_dailysurvey {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `hsa_new' `supervision' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
}
foreach v of varlist num_dailysurvey {
sum `v' if second_stage==1
sum `v' if second_stage==2
sum `v' if second_stage==3
sum `v' if second_stage==4
}
restore

// 3. Subjective performance evaluation (by survey respondents)  (columns (7)-(9) )
// Outcome variable: pes3q
replace enumerator_pes="ISAIAH" if enumerator_pes=="ISAIAH  "
replace enumerator_pes="HENRY" if enumerator_pes=="HENRY "
encode enumerator_pes, generate(enu_pes)
char enu_pes [omit] 32
xi i.enu_pes

** drop when the PES respondents are not the census respondents
preserve
keep if pes2q=="1"
foreach v of varlist pes3q {
replace `v'="." if `v'=="0" | `v'=="6" | `v'=="2015-06-03" | `v'=="2015-06-02" | `v'=="2015-06-22" | `v'=="55" | `v'=="9" | `v'=="francis maliwa"
destring `v', replace
}
local fixed_pes _Ienu_pes_1 _Ienu_pes_2 _Ienu_pes_3 _Ienu_pes_4 _Ienu_pes_5 _Ienu_pes_6 _Ienu_pes_7 _Ienu_pes_8 _Ienu_pes_9 _Ienu_pes_10 _Ienu_pes_11 _Ienu_pes_12 _Ienu_pes_13 _Ienu_pes_14 _Ienu_pes_15 _Ienu_pes_16 _Ienu_pes_17 _Ienu_pes_18 _Ienu_pes_19 _Ienu_pes_20 _Ienu_pes_21 _Ienu_pes_22 _Ienu_pes_23 _Ienu_pes_24 _Ienu_pes_25 _Ienu_pes_26 _Ienu_pes_27 _Ienu_pes_28 _Ienu_pes_29 _Ienu_pes_30 _Ienu_pes_31
local fixed_day day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local choi avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe partg1000
local supervision after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist pes3q  {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `fixed_day' `fixed_pes' `hsa_new' `supervision' `hiv' i.num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `fixed_pes' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
}
sum pes3q if second_stage==1
sum pes3q if second_stage==2
sum pes3q if second_stage==3
sum pes3q if second_stage==4
restore

// 4. Subjective evaluation of work attitude (by supervisors) (columns (10)-(12) )
// Outcome variable: attitude
//// Reduce sample size into enumerator-level
collapse (mean) quiz distance1 practice_error G1 G2 G3 avgfrac_distant_tpe avgfrac_risk ccei_combined_tpe attitude num_dailysurvey second_stage hsa day1 day2 day3 day4 day5 day6 day7 day8 day9 day10 day11 day12 day13 day14 day15 day16 day17 day18 day19 day20 day21 day22 day23 day24 day25 day26 day27 day28 day29 day30 day31 day32 day33 day34 day35 day36 day37 day38 day39 day40 day41 day42 day43 day44 age asset_score2 num_siblings  ability_index31 ability_index32 rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5 num_hhmember_new malaria_hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new death_hsa_new error_rate after1_team1 after2_team1 after1_team2 after2_team2 after1_team3 after2_team3 after1_team4 after2_team4 after1_team5 after2_team5 cct mcoffer2 eduoffer2, by(id) 
local hsa_new hh_hsa_enu asset_score_hsa_new birth_hsa_new malaria_hsa_new death_hsa_new distance1
local demo i.age asset_score2 
local cog ability_index31 ability_index32
local noncog rosenberg2 intrinsic_value2 extrinsic_motivation2 extroversion agreeableness conscientiousness emotional_stability openness_to_experiences
local supervision first_superv1 first_superv2 first_superv3 first_superv4 first_superv5 second_superv1 second_superv2 second_superv3 second_superv4 second_superv5
local hiv cct mcoffer2 eduoffer2
local train quiz practice_error
local indi `demo' `cog' `noncog'
foreach v of varlist attitude {
//// Panel A: Selection effect (G2 vs G3)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel B: Causal effect of  career incentives (G3 vs. G4)]
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G3 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==3, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel C: Causal effect of  wage (G1 vs. G2)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G2 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==2 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
//// Panel D: Combined effect (G1 vs. G4)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
reg `v' G1 `supervision' `hsa_new' `hiv' num_hhmember_new num_siblings `indi' `train' if second_stage==4 | second_stage==1, robust cl(hsa)
outreg2 using "$result_path/tabA7_without11.xls", symbol(***, **, *) dec(3) excel append drop(`indi' `fixed_day' `train' o.* `hiv' `hsa_new' `supervision' `demo' `cog' `noncog' num_siblings i.num_hhmember_new num_hhmember_new)
}
sum attitude if second_stage==1
sum attitude if second_stage==2
sum attitude if second_stage==3
sum attitude if second_stage==4



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Figure 2: Training Performance 
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

// Panel A. Quiz score / Practice survey error rate
use "$data_path/enumerator_level.dta", clear

//// Quiz score
preserve
kdensity quiz if first_stage==1, nograph gen(x1 fx1)
kdensity quiz if first_stage==2, nograph gen(x2 fx2)
twoway (line fx1 x1, lwidth(medthick))(line fx2 x2, lwidth(medthick) lpattern(longdash)), xlabel(0(2)13) ylabel(0(.04).22) ytitle("Kernel density of quiz score") xtitle("Quiz score") legend(lab(1 "Internship group") lab(2 "Wage group"))
graph save Graph "$graph_path/fig2_A_quiz.gph", replace
restore

//// Practice survey error rate
preserve
kdensity practice_error if first_stage==1, nograph gen(x1 fx1)
kdensity practice_error if first_stage==2, nograph gen(x2 fx2)
twoway (line fx1 x1, lwidth(medthick))(line fx2 x2, lwidth(medthick) lpattern(longdash)), xlabel(0(.1).9) ylabel(0(.5)3.1) ytitle("Kernel density of practice survey error rate") xtitle("Practice survey error rate") legend(lab(1 "Internship group") lab(2 "Wage group"))
graph save Graph "$graph_path/fig2_A_mock.gph", replace
restore

// Panel B. Selection Effect (G2 vs G3) 
// Panel C. Causal Effect of Career Incentives (G3 vs G4)
// Panel D. Causal Effect of Wage Incentives (G2 vs G1)
use "$data_path/household_level.dta", clear

//// Survey quality (error rate)
preserve
kdensity error_rate if second_stage==1, nograph gen(x1 fx1)
kdensity error_rate if second_stage==2, nograph gen(x2 fx2)
kdensity error_rate if second_stage==3, nograph gen(x3 fx3)
kdensity error_rate if second_stage==4, nograph gen(x4 fx4)
twoway (line fx2 x2 if x2<.15, lwidth(medthick))(line fx3 x3 if x3<.15, lwidth(medthick) lpattern(longdash)), xlabel(0(.05).15) ylabel(0(4)12) ytitle("Kernel density of error rate") xtitle("Survey quality (error rate)") legend(lab(1 "Group2") lab(2 "Group3"))
graph save Graph "$graph_path/fig2_B_qual.gph", replace
twoway (line fx3 x3 if x3<.15, lwidth(medthick))(line fx4 x4 if x4<.15, lwidth(medthick) lpattern(longdash)), xlabel(0(.05).15) ylabel(0(4)12) ytitle("Kernel density of error rate") xtitle("Survey quality (error rate)") legend(lab(1 "Group3") lab(2 "Group4"))
graph save Graph "$graph_path/fig2_C_qual.gph", replace
twoway (line fx1 x1 if x1<.15, lwidth(medthick))(line fx2 x2 if x2<.15, lwidth(medthick) lpattern(longdash)), xlabel(0(.05).15) ylabel(0(4)12) ytitle("Kernel density of error rate") xtitle("Survey quality (error rate)") legend(lab(1 "Group1") lab(2 "Group2"))
graph save Graph "$graph_path/fig2_D_qual.gph", replace
restore

//// Survey quantity (number of surveys per day)
preserve
collapse (mean) num_dailysurvey second_stage, by(datenum id) 
kdensity num_dailysurvey if second_stage==1, nograph gen(x1 fx1)
kdensity num_dailysurvey if second_stage==2, nograph gen(x2 fx2)
kdensity num_dailysurvey if second_stage==3, nograph gen(x3 fx3)
kdensity num_dailysurvey if second_stage==4, nograph gen(x4 fx4)
twoway (line fx2 x2 if x2<25, lwidth(medthick))(line fx3 x3 if x3<25, lwidth(medthick) lpattern(longdash)), xlabel(0(5)25) ylabel(0(.03).09) ytitle("Kernel density of number of surveys per day") xtitle("Survey quantity (number of surveys per day)") legend(lab(1 "Group2") lab(2 "Group3"))
graph save Graph "$graph_path/fig2_B_quant.gph", replace
twoway (line fx3 x3 if x3<25, lwidth(medthick))(line fx4 x4 if x4<25, lwidth(medthick) lpattern(longdash)), xlabel(0(5)25) ylabel(0(.03).09) ytitle("Kernel density of number of surveys per day") xtitle("Survey quantity (number of surveys per day)") legend(lab(1 "Group3") lab(2 "Group4"))
graph save Graph "$graph_path/fig2_C_quant.gph", replace
twoway (line fx1 x1 if x1<25, lwidth(medthick))(line fx2 x2 if x2<25, lwidth(medthick) lpattern(longdash)), xlabel(0(5)25) ylabel(0(.03).09) ytitle("Kernel density of number of surveys per day") xtitle("Survey quantity (number of surveys per day)") legend(lab(1 "Group1") lab(2 "Group2"))
graph save Graph "$graph_path/fig2_D_quant.gph", replace
restore



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Figure A.5: Daily job performance trend
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use "$data_path/household_level.dta", clear
replace datenum=2 if datenum==1
replace datenum=datenum-1
drop if datenum>13
drop if second_stage==. | second_stage==5
collapse (mean) error_rate num_dailysurvey (sd) error_rate_sd=error_rate num_dailysurvey_sd=num_dailysurvey, by(datenum second_stage) 

//// Panel A: Selection Effect (G2 vs G3) 
//// Survey quality (error rate)
preserve
foreach v of varlist error_rate {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==2, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==3, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==2, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==3, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group2" 2 "Group3")) ytitle(Survey quality)
graph save Graph "$graph_path/figA5_A_qual.gph", replace
}
//// Survey quantity (number of surveys per day)
foreach v of varlist num_dailysurvey {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==2, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==3, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==2, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==3, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group2" 2 "Group3")) ytitle(Survey quantity)
graph save Graph "$graph_path/figA5_A_quant.gph", replace
}
restore

//// Panel B: Causal Effect of Career Incentives (G3 vs G4)
//// Survey quality (error rate)
preserve
foreach v of varlist error_rate {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==3, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==4, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==3, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==4, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group3" 2 "Group4")) ytitle(Survey quality)
graph save Graph "$graph_path/figA5_B_qual.gph", replace
}
//// Survey quantity (number of surveys per day)
foreach v of varlist num_dailysurvey {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==3, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==4, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==3, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==4, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group3" 2 "Group4")) ytitle(Survey quantity)
graph save Graph "$graph_path/figA5_B_quant.gph", replace
}
restore

//// Panel C: Causal Effect of Wage Incentives (G2 vs G1)
//// Survey quality (error rate)
preserve
foreach v of varlist error_rate {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==1, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==2, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==1, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==2, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group1" 2 "Group2")) ytitle(Survey quality)
graph save Graph "$graph_path/figA5_C_qual.gph", replace
}
//// Survey quantity (number of surveys per day)
foreach v of varlist num_dailysurvey {
gen up_`v'=`v'+`v'_sd
gen lo_`v'=`v'-`v'_sd 
twoway (scatter `v' datenum if second_stage==1, connect(direct) lpattern(solid) lwidth(thick) lcolor(ltblue))(scatter `v' datenum if second_stage==2, connect(direct) lpattern(shortdash) lcolor(red) lwidth(medthick))(rcap up_`v' lo_`v' datenum if second_stage==1, lwidth(thick) lcolor(ltblue))(rcap up_`v' lo_`v' datenum if second_stage==2, lcolor(red)), xlabel(1(2)13) xtitle(N th day from the first survey day) legend(order(1 "Group1" 2 "Group2")) ytitle(Survey quantity)
graph save Graph "$graph_path/figA5_C_quant.gph", replace
}
restore



//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Figure A.6: Training outcomes
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
use "$data_path/enumerator_level.dta", clear

//// Panel A: Quiz
preserve
drop if second_stage==5 | second_stage==.
foreach v of varlist quiz {
egen mean`v' = mean(`v'), by(second_stage) 
egen n`v' = count(`v'), by(second_stage) 
egen sd`v' = sd(`v'), by(second_stage) 
gen upper`v' = mean`v' + invttail(n`v'-1,0.025) * sd`v' / sqrt(n`v') 
gen lower`v' = mean`v' - invttail(n`v'-1,0.025) * sd`v' / sqrt(n`v') 
twoway (bar mean`v' second_stage, xlabel(1 "G1" 2 "G2" 3 "G3" 4 "G4", labsize(small)) ylabel(0(2)10) barwidth(0.7) xtitle(""))(rcap upper`v' lower`v' second_stage), legend(lab(1 "Quiz score") lab(2 "95% confidence level") holes(2))
graph save Graph "$graph_path/figA6_quiz.gph", replace
}
restore

//// Panel B: Practice survey error rate
preserve
drop if second_stage==5 | second_stage==.
foreach v of varlist practice_error {
egen mean`v' = mean(`v'), by(second_stage) 
egen n`v' = count(`v'), by(second_stage) 
egen sd`v' = sd(`v'), by(second_stage) 
gen upper`v' = mean`v' + invttail(n`v'-1,0.025) * sd`v' / sqrt(n`v') 
gen lower`v' = mean`v' - invttail(n`v'-1,0.025) * sd`v' / sqrt(n`v') 
twoway (bar mean`v' second_stage, xlabel(1 "G1" 2 "G2" 3 "G3" 4 "G4", labsize(small)) ylabel(0(0.1).5) barwidth(0.7) xtitle(""))(rcap upper`v' lower`v' second_stage), legend(lab(1 "Practice survey error rate") lab(2 "95% confidence level") holes(2))
graph save Graph "$graph_path/figA6_mock.gph", replace
}
restore
